{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Approach:\n",
    "    - Crawl the hyponym tree for the word \"structure\" in Wordnet -- get every hyponym of every word that is a hyponym of \"structure\".\n",
    "    - Repeat for \"vegetation\", \"body of water\", \"geological formation\"\n",
    "\n",
    "\n",
    "    - Lowercase and strip the words.\n",
    "    - Writes the list to a txt in /data/ folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.corpus import wordnet\n",
    "\n",
    "SEED = ['structure', 'vegetation', 'body_of_water', 'geological_formation']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def explore(word):\n",
    "    for syn in wordnet.synsets(word):\n",
    "        print(syn.name(), \"| Definition:\", syn.definition())\n",
    "        print(len(syn.hyponyms()), \"hyponyms:\", syn.hyponyms()[:5], \"...\")\n",
    "        print(\"----------\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "###################\n",
      "\n",
      "structure.n.01 | Definition: a thing constructed; a complex entity constructed of many parts\n",
      "64 hyponyms: [Synset('airdock.n.01'), Synset('altar.n.02'), Synset('arcade.n.02'), Synset('arch.n.04'), Synset('area.n.05')] ...\n",
      "----------\n",
      "structure.n.02 | Definition: the manner of construction of something and the arrangement of its parts\n",
      "4 hyponyms: [Synset('computer_architecture.n.02'), Synset('cytoarchitecture.n.01'), Synset('framework.n.02'), Synset('infrastructure.n.01')] ...\n",
      "----------\n",
      "structure.n.03 | Definition: the complex composition of knowledge as elements and their combinations\n",
      "4 hyponyms: [Synset('arrangement.n.03'), Synset('form.n.03'), Synset('morphology.n.03'), Synset('syntax.n.01')] ...\n",
      "----------\n",
      "structure.n.04 | Definition: a particular complex anatomical part of a living thing\n",
      "62 hyponyms: [Synset('alveolar_bed.n.01'), Synset('apodeme.n.01'), Synset('aster.n.02'), Synset('ball.n.10'), Synset('blade.n.06')] ...\n",
      "----------\n",
      "social_organization.n.01 | Definition: the people in a society considered as a system organized by a characteristic pattern of relationships\n",
      "7 hyponyms: [Synset('class_structure.n.01'), Synset('feudalism.n.01'), Synset('matriarchy.n.01'), Synset('meritocracy.n.01'), Synset('patriarchy.n.01')] ...\n",
      "----------\n",
      "structure.v.01 | Definition: give a structure to\n",
      "1 hyponyms: [Synset('restructure.v.01')] ...\n",
      "----------\n",
      "\n",
      "\n",
      "###################\n",
      "\n",
      "vegetation.n.01 | Definition: all the plant life in a particular region or period\n",
      "10 hyponyms: [Synset('brier.n.01'), Synset('browse.n.01'), Synset('brush.n.01'), Synset('forest.n.01'), Synset('garden.n.02')] ...\n",
      "----------\n",
      "vegetation.n.02 | Definition: the process of growth in plants\n",
      "0 hyponyms: [] ...\n",
      "----------\n",
      "vegetation.n.03 | Definition: an abnormal growth or excrescence (especially a warty excrescence on the valves of the heart)\n",
      "0 hyponyms: [] ...\n",
      "----------\n",
      "vegetation.n.04 | Definition: inactivity that is passive and monotonous, comparable to the inactivity of plant life\n",
      "0 hyponyms: [] ...\n",
      "----------\n",
      "\n",
      "\n",
      "###################\n",
      "\n",
      "body_of_water.n.01 | Definition: the part of the earth's surface covered with water (such as a river or lake or ocean)\n",
      "25 hyponyms: [Synset('backwater.n.01'), Synset('bay.n.01'), Synset('channel.n.04'), Synset('drink.n.04'), Synset('estuary.n.01')] ...\n",
      "----------\n",
      "\n",
      "\n",
      "###################\n",
      "\n",
      "geological_formation.n.01 | Definition: (geology) the geological features of the earth\n",
      "28 hyponyms: [Synset('aquifer.n.01'), Synset('beach.n.01'), Synset('cave.n.01'), Synset('cliff.n.01'), Synset('delta.n.01')] ...\n",
      "----------\n"
     ]
    }
   ],
   "source": [
    "for word in SEED:\n",
    "    print(\"\\n\\n###################\\n\")\n",
    "    explore(word)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def get_hyponyms(synset):\n",
    "#     all_hyps = []\n",
    "#     print(\"\\n-------\\nFunction Call:: {} has {} hyponyms: {}\\n-------\".format(synset, len(synset.hyponyms()), synset.hyponyms()))\n",
    "#     for hyp in synset.hyponyms():\n",
    "#         all_hyps.append(hyp)\n",
    "#         print(hyp, len(hyp.hyponyms()))\n",
    "#         if hyp.hyponyms():\n",
    "#             print(\"There are more for:\", hyp)\n",
    "#             all_hyps.extend(get_hyponyms(hyp))\n",
    "#     return all_hyps\n",
    "\n",
    "\n",
    "\n",
    "# I get the same numbers with different code. Manually looked at four different levels of the Hyponym tree.\n",
    "# All four are present in the lexicon: arch -> pointed_arch -> gothic_arch -> lancet_arch\n",
    "# Note that there are different senses, vegetation.n.01 has 10 hyponyms but vegetation.n.02 has none.\n",
    "# So that probably led to the miscount."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_hyponyms(synset):\n",
    "    hyponyms = set()\n",
    "    for hyponym in synset.hyponyms():\n",
    "        hyponyms |= set(get_hyponyms(hyponym))\n",
    "    return hyponyms | set(synset.hyponyms())\n",
    "\n",
    "\n",
    "def traverse(word):\n",
    "    \"\"\"\n",
    "    Traverse te hyponym tree.\n",
    "    Using the first noun sense for all words.\n",
    "    Returns a list of words.\n",
    "    \"\"\"\n",
    "    syn = wordnet.synset(word+'.n.01') # only considering the first sense\n",
    "    hyp = list(get_hyponyms(syn))\n",
    "\n",
    "    hyp_names = []\n",
    "    for s in hyp:\n",
    "        hyp_names.append(s.name().split('.')[0])\n",
    "#         hyp_lemmas.extend(s.lemma_names())\n",
    "\n",
    "    print(\"\\n-----\\nTotal Hyponyms:\", len(hyp), len(hyp_names))\n",
    "    print(\"Hyp:\", hyp[:5])\n",
    "    return hyp_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "-----\n",
      "Total Hyponyms: 1405 1405\n",
      "Hyp: [Synset('hotel-casino.n.02'), Synset('mess.n.05'), Synset('floor.n.10'), Synset('university.n.02'), Synset('cafeteria.n.01')]\n",
      "\n",
      "-----\n",
      "Total Hyponyms: 27 27\n",
      "Hyp: [Synset('scrub.n.01'), Synset('old_growth.n.01'), Synset('tropical_rain_forest.n.01'), Synset('baby's_tears.n.01'), Synset('bosk.n.01')]\n",
      "\n",
      "-----\n",
      "Total Hyponyms: 75 75\n",
      "Hyp: [Synset('high_sea.n.01'), Synset('inlet.n.01'), Synset('oxbow_lake.n.01'), Synset('horsepond.n.01'), Synset('liman.n.01')]\n",
      "\n",
      "-----\n",
      "Total Hyponyms: 175 175\n",
      "Hyp: [Synset('seif_dune.n.01'), Synset('relict.n.02'), Synset('continental_slope.n.01'), Synset('basin.n.03'), Synset('atoll.n.01')]\n"
     ]
    }
   ],
   "source": [
    "places = []\n",
    "for word in SEED:\n",
    "    places.extend(traverse(word))\n",
    "    \n",
    "places = list(set(places))\n",
    "places_vocab = [p.replace(\"_\", \" \").strip().lower() for p in places] # lowercase & add whitespace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Total: 1560\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['hill',\n",
       " 'natural elevation',\n",
       " 'living room',\n",
       " 'public works',\n",
       " 'apadana',\n",
       " 'flashboard',\n",
       " 'meat house',\n",
       " 'basilica',\n",
       " 'gun enclosure',\n",
       " 'mental hospital',\n",
       " 'command module',\n",
       " 'rabbit hutch',\n",
       " 'garage',\n",
       " 'bungalow',\n",
       " 'drawbridge',\n",
       " 'stronghold',\n",
       " 'torture chamber',\n",
       " 'earplug',\n",
       " 'drop arch',\n",
       " 'deck',\n",
       " 'minster',\n",
       " 'drip',\n",
       " 'catchall',\n",
       " 'guyot',\n",
       " 'compound',\n",
       " 'cupola',\n",
       " 'quartering',\n",
       " 'fan vaulting',\n",
       " 'kraal',\n",
       " 'yurt',\n",
       " 'safety arch',\n",
       " 'drugstore',\n",
       " 'vault',\n",
       " 'studio apartment',\n",
       " 'walk-in',\n",
       " 'shower stall',\n",
       " 'music school',\n",
       " 'home',\n",
       " 'showroom',\n",
       " 'teashop',\n",
       " 'loft',\n",
       " 'open-air market',\n",
       " 'court',\n",
       " 'symmetry',\n",
       " 'gorge',\n",
       " 'pen',\n",
       " 'window',\n",
       " 'music hall',\n",
       " 'pedestal',\n",
       " 'picture window',\n",
       " 'ledge',\n",
       " 'brae',\n",
       " 'conformation',\n",
       " 'spot',\n",
       " 'strait',\n",
       " 'rill',\n",
       " 'oxbridge',\n",
       " 'government building',\n",
       " 'hayrack',\n",
       " 'reef',\n",
       " 'footbridge',\n",
       " 'scrub',\n",
       " 'triclinium',\n",
       " 'food court',\n",
       " 'grate',\n",
       " 'round arch',\n",
       " 'stall',\n",
       " 'arete',\n",
       " 'dry dock',\n",
       " 'cloud chamber',\n",
       " 'interior door',\n",
       " 'dry masonry',\n",
       " 'log cabin',\n",
       " 'dinette',\n",
       " 'stately home',\n",
       " 'tennis camp',\n",
       " 'convenience store',\n",
       " 'canteen',\n",
       " 'control center',\n",
       " 'fly tent',\n",
       " 'flatlet',\n",
       " \"pawnbroker's shop\",\n",
       " 'down',\n",
       " 'pannier',\n",
       " 'sickroom',\n",
       " 'crater',\n",
       " 'bethel',\n",
       " 'morning room',\n",
       " 'piedmont',\n",
       " 'pond',\n",
       " 'tepee',\n",
       " 'nailhead',\n",
       " 'ridge',\n",
       " 'barn',\n",
       " 'pavilion',\n",
       " 'seashore',\n",
       " 'trestle',\n",
       " 'manor hall',\n",
       " 'rift valley',\n",
       " 'outhouse',\n",
       " 'plywood',\n",
       " 'school system',\n",
       " 'pothole',\n",
       " 'bailey',\n",
       " 'hearth',\n",
       " 'oracle',\n",
       " 'sauna',\n",
       " 'hotel-casino',\n",
       " 'double glazing',\n",
       " 'corbel arch',\n",
       " 'outwork',\n",
       " 'ski lodge',\n",
       " 'dead-air space',\n",
       " 'prong',\n",
       " 'meat safe',\n",
       " 'hardware store',\n",
       " 'platform',\n",
       " 'commissary',\n",
       " 'hurdle',\n",
       " 'furnace',\n",
       " 'speed bump',\n",
       " 'window frame',\n",
       " 'country house',\n",
       " 'seven seas',\n",
       " 'wellhead',\n",
       " 'burrow',\n",
       " 'tollbooth',\n",
       " 'precipice',\n",
       " 'adjoining room',\n",
       " 'prison camp',\n",
       " 'disco',\n",
       " 'patisserie',\n",
       " 'apartment',\n",
       " \"ladies' room\",\n",
       " 'wainscoting',\n",
       " 'telephone booth',\n",
       " 'door',\n",
       " 'villa',\n",
       " 'waterside',\n",
       " 'tambour',\n",
       " 'french door',\n",
       " 'suspension bridge',\n",
       " 'consulate',\n",
       " 'canyon',\n",
       " 'gazebo',\n",
       " 'homestead',\n",
       " 'priory',\n",
       " 'outfitter',\n",
       " 'steam bath',\n",
       " 'roman arch',\n",
       " 'quartz battery',\n",
       " 'ascent',\n",
       " 'departure lounge',\n",
       " 'greenhouse',\n",
       " 'gift shop',\n",
       " 'gable',\n",
       " 'harbor',\n",
       " 'canyonside',\n",
       " 'slave market',\n",
       " 'breakwater',\n",
       " 'alcazar',\n",
       " 'classroom',\n",
       " 'cottage tent',\n",
       " 'presence chamber',\n",
       " 'academy',\n",
       " 'bomb rack',\n",
       " 'headquarters',\n",
       " 'fire tower',\n",
       " 'birdcage',\n",
       " 'cafe',\n",
       " 'distributary',\n",
       " 'barnyard',\n",
       " 'call center',\n",
       " 'railroad flat',\n",
       " 'lunchroom',\n",
       " 'borrow pit',\n",
       " 'radio chassis',\n",
       " 'retaining wall',\n",
       " 'choir loft',\n",
       " 'reverberatory furnace',\n",
       " 'sudatorium',\n",
       " 'cataract',\n",
       " 'sweatshop',\n",
       " 'stassano furnace',\n",
       " 'atoll',\n",
       " 'liman',\n",
       " 'death camp',\n",
       " 'inlet',\n",
       " 'architecture',\n",
       " 'castle',\n",
       " 'sawdust saloon',\n",
       " 'trestle bridge',\n",
       " 'keep',\n",
       " 'support',\n",
       " 'holding cell',\n",
       " 'butte',\n",
       " 'milldam',\n",
       " 'voting booth',\n",
       " 'chainlink fence',\n",
       " 'covered bridge',\n",
       " 'water closet',\n",
       " 'divot',\n",
       " 'charterhouse',\n",
       " 'grillroom',\n",
       " 'smokehouse',\n",
       " 'aviary',\n",
       " 'synagogue',\n",
       " 'firing chamber',\n",
       " 'mooring tower',\n",
       " 'growth',\n",
       " 'conning tower',\n",
       " 'fire',\n",
       " 'cheval-de-frise',\n",
       " 'trellis',\n",
       " 'geyser',\n",
       " 'chandlery',\n",
       " 'backwater',\n",
       " 'set-back',\n",
       " 'hold',\n",
       " 'storm door',\n",
       " 'fence',\n",
       " 'plenum',\n",
       " 'privet hedge',\n",
       " 'pump house',\n",
       " 'glen',\n",
       " 'tent',\n",
       " 'escarpment',\n",
       " 'mausoleum',\n",
       " 'room',\n",
       " 'firebox',\n",
       " 'main',\n",
       " 'concentration camp',\n",
       " 'backstop',\n",
       " 'center',\n",
       " 'settlement house',\n",
       " 'brickwork',\n",
       " 'earthwork',\n",
       " 'oil burner',\n",
       " 'hotel room',\n",
       " 'dungeon',\n",
       " 'prefab',\n",
       " 'slope',\n",
       " 'cookfire',\n",
       " 'cable car',\n",
       " 'washroom',\n",
       " 'masonry',\n",
       " 'reading room',\n",
       " 'water hazard',\n",
       " 'armory',\n",
       " 'maquiladora',\n",
       " 'range',\n",
       " 'mound',\n",
       " 'loan office',\n",
       " 'clubroom',\n",
       " 'ward',\n",
       " 'bazaar',\n",
       " 'second balcony',\n",
       " 'rood screen',\n",
       " 'branch',\n",
       " 'bosk',\n",
       " 'lagoon',\n",
       " 'conventicle',\n",
       " 'barrier reef',\n",
       " 'point',\n",
       " 'whitlowwort',\n",
       " 'refectory',\n",
       " 'brake cylinder',\n",
       " 'farmyard',\n",
       " 'storm cellar',\n",
       " 'chase',\n",
       " 'harem',\n",
       " 'kopje',\n",
       " 'landfall',\n",
       " 'supermarket',\n",
       " 'forge',\n",
       " 'trefoil arch',\n",
       " 'plate rack',\n",
       " 'parsonage',\n",
       " 'wicket',\n",
       " 'obstruction',\n",
       " 'cell',\n",
       " 'dale',\n",
       " 'portico',\n",
       " 'cockloft',\n",
       " 'luggage compartment',\n",
       " 'sliding window',\n",
       " 'clotheshorse',\n",
       " 'posthouse',\n",
       " 'nogging',\n",
       " 'tudor arch',\n",
       " 'joss house',\n",
       " 'forecastle',\n",
       " 'building supply store',\n",
       " 'landing',\n",
       " 'fife rail',\n",
       " 'sash',\n",
       " 'hogback',\n",
       " 'line of defense',\n",
       " 'cliff',\n",
       " 'undercarriage',\n",
       " 'echo chamber',\n",
       " 'bubble chamber',\n",
       " 'sun parlor',\n",
       " 'circus tent',\n",
       " 'streambed',\n",
       " 'acropolis',\n",
       " 'south sea',\n",
       " 'alp',\n",
       " 'tourist class',\n",
       " 'sandpit',\n",
       " 'terrarium',\n",
       " 'lodging house',\n",
       " 'toastrack',\n",
       " 'housing',\n",
       " 'brass',\n",
       " 'university',\n",
       " 'deep',\n",
       " 'boot camp',\n",
       " 'barrack',\n",
       " 'maisonette',\n",
       " 'ziggurat',\n",
       " 'screen door',\n",
       " 'lodge',\n",
       " 'ice hockey rink',\n",
       " 'dining room',\n",
       " 'belfry',\n",
       " 'fat farm',\n",
       " 'triumphal arch',\n",
       " 'superstructure',\n",
       " 'chancel',\n",
       " 'vacation home',\n",
       " 'rumpus room',\n",
       " 'tampon',\n",
       " 'free house',\n",
       " 'thriftshop',\n",
       " 'hospital',\n",
       " 'fishpond',\n",
       " 'towel rack',\n",
       " 'abbey',\n",
       " 'delicatessen',\n",
       " 'skybox',\n",
       " 'sugar refinery',\n",
       " 'porthole',\n",
       " 'laager',\n",
       " 'megalith',\n",
       " 'viaduct',\n",
       " 'fender',\n",
       " 'jury box',\n",
       " 'estuary',\n",
       " 'hacienda',\n",
       " 'haven',\n",
       " 'palisade',\n",
       " 'palace',\n",
       " 'pit',\n",
       " 'lounge',\n",
       " 'threshing floor',\n",
       " 'guide',\n",
       " 'gas oven',\n",
       " 'storage space',\n",
       " 'farmhouse',\n",
       " 'diner',\n",
       " 'barbecue',\n",
       " 'particle detector',\n",
       " 'water hole',\n",
       " 'resonator',\n",
       " 'cinema',\n",
       " 'icecap',\n",
       " 'drink',\n",
       " 'lamasery',\n",
       " 'framework',\n",
       " 'tidal basin',\n",
       " 'drainplug',\n",
       " 'work camp',\n",
       " 'climbing frame',\n",
       " 'pediment',\n",
       " 'volcanic crater',\n",
       " 'organ loft',\n",
       " 'honeycomb',\n",
       " 'borstal',\n",
       " 'computer store',\n",
       " 'press box',\n",
       " 'cabaret',\n",
       " 'movable barrier',\n",
       " 'diggings',\n",
       " 'den',\n",
       " 'dressing room',\n",
       " 'marina',\n",
       " 'spark chamber',\n",
       " 'purdah',\n",
       " 'boutique',\n",
       " 'mat',\n",
       " 'baggage claim',\n",
       " 'closet',\n",
       " 'reception room',\n",
       " 'double-hung window',\n",
       " 'lakefront',\n",
       " 'tumbler',\n",
       " 'post exchange',\n",
       " 'roman building',\n",
       " 'choir',\n",
       " 'blockhouse',\n",
       " 'steam chest',\n",
       " 'basin',\n",
       " 'health spa',\n",
       " 'outbuilding',\n",
       " 'tarn',\n",
       " 'bodywork',\n",
       " 'newsroom',\n",
       " 'bayou',\n",
       " 'amphitheater',\n",
       " 'rail fence',\n",
       " 'life office',\n",
       " 'bell tent',\n",
       " 'glory hole',\n",
       " 'bedroom',\n",
       " 'weir',\n",
       " 'field hospital',\n",
       " \"artist's loft\",\n",
       " 'sweatbox',\n",
       " 'coatrack',\n",
       " 'pointed arch',\n",
       " 'proscenium arch',\n",
       " 'boathouse',\n",
       " 'guantanamo bay',\n",
       " 'toll bridge',\n",
       " 'hyperbaric chamber',\n",
       " 'repository',\n",
       " 'altarpiece',\n",
       " 'gambling house',\n",
       " 'mud puddle',\n",
       " 'vaulting',\n",
       " 'skyscraper',\n",
       " 'shoe shop',\n",
       " 'seven wonders of the ancient world',\n",
       " 'bannister',\n",
       " 'gulf',\n",
       " 'scoinson arch',\n",
       " 'twilight zone',\n",
       " 'fuselage',\n",
       " 'national monument',\n",
       " 'foundation',\n",
       " 'wind tunnel',\n",
       " 'barn door',\n",
       " 'shower room',\n",
       " 'lamination',\n",
       " 'home room',\n",
       " 'stateroom',\n",
       " 'bistro',\n",
       " 'boarding house',\n",
       " 'esker',\n",
       " 'supporting structure',\n",
       " 'pantry',\n",
       " 'boot',\n",
       " 'water jump',\n",
       " 'lyceum',\n",
       " 'embankment',\n",
       " 'brush',\n",
       " 'government office',\n",
       " 'bay window',\n",
       " 'polar glacier',\n",
       " 'day nursery',\n",
       " 'mountainside',\n",
       " 'lake dwelling',\n",
       " 'massif',\n",
       " 'boardroom',\n",
       " 'grotto',\n",
       " 'shop',\n",
       " 'altar',\n",
       " 'turnstile',\n",
       " 'dormer window',\n",
       " 'clip joint',\n",
       " 'cog',\n",
       " 'service club',\n",
       " 'discount house',\n",
       " 'jungle',\n",
       " 'cage',\n",
       " 'tideland',\n",
       " 'natural depression',\n",
       " 'relict',\n",
       " 'defensive structure',\n",
       " 'gun carriage',\n",
       " 'pied-a-terre',\n",
       " 'automat',\n",
       " 'neritic zone',\n",
       " 'brook',\n",
       " 'shoal',\n",
       " 'oceanfront',\n",
       " 'stupa',\n",
       " 'building',\n",
       " 'tract house',\n",
       " 'guardroom',\n",
       " 'city hall',\n",
       " 'witness box',\n",
       " 'field tent',\n",
       " 'ditch',\n",
       " 'crag',\n",
       " 'place of worship',\n",
       " 'mouth',\n",
       " 'praetorium',\n",
       " 'study',\n",
       " 'polling booth',\n",
       " 'rathole',\n",
       " 'stockade',\n",
       " 'diplomatic building',\n",
       " 'laminate',\n",
       " 'hedge',\n",
       " 'creep',\n",
       " 'dry wall',\n",
       " 'anteroom',\n",
       " 'cybercafe',\n",
       " 'draw',\n",
       " 'sounding board',\n",
       " 'tap',\n",
       " 'dining area',\n",
       " 'sawpit',\n",
       " 'box office',\n",
       " 'beacon',\n",
       " 'carport',\n",
       " 'pop tent',\n",
       " 'convent',\n",
       " 'student union',\n",
       " 'crash barrier',\n",
       " 'hoop',\n",
       " 'grille',\n",
       " 'cirque',\n",
       " 'efficiency apartment',\n",
       " 'canebrake',\n",
       " 'reformatory',\n",
       " 'sickbay',\n",
       " 'erection',\n",
       " 'clock tower',\n",
       " 'gopher hole',\n",
       " 'scoreboard',\n",
       " 'bucket shop',\n",
       " 'control room',\n",
       " 'truss',\n",
       " 'coal house',\n",
       " 'hull',\n",
       " 'roadbed',\n",
       " 'passe-partout',\n",
       " 'war room',\n",
       " 'institution',\n",
       " 'cross',\n",
       " 'memorial',\n",
       " 'correctional institution',\n",
       " 'sandwich board',\n",
       " 'rodeo',\n",
       " 'tollgate',\n",
       " 'fantail',\n",
       " 'channel',\n",
       " 'sanitary landfill',\n",
       " 'toolshed',\n",
       " 'colonnade',\n",
       " 'safety rail',\n",
       " 'public house',\n",
       " 'stillroom',\n",
       " 'jobcentre',\n",
       " 'moorish arch',\n",
       " 'hippodrome',\n",
       " 'pantheon',\n",
       " 'barbette',\n",
       " 'bed',\n",
       " 'casino',\n",
       " 'abattis',\n",
       " 'scullery',\n",
       " 'carrier',\n",
       " 'area',\n",
       " 'roomette',\n",
       " 'box',\n",
       " 'breechblock',\n",
       " 'cow pen',\n",
       " 'cocktail lounge',\n",
       " 'hall',\n",
       " 'chassis',\n",
       " 'car',\n",
       " 'bridge',\n",
       " 'back porch',\n",
       " 'windbreak',\n",
       " 'fanlight',\n",
       " 'lockup',\n",
       " 'continental slope',\n",
       " 'promontory',\n",
       " 'squirrel cage',\n",
       " 'cafeteria',\n",
       " 'stocks',\n",
       " 'porch',\n",
       " 'tie rack',\n",
       " 'oriel',\n",
       " 'scriptorium',\n",
       " 'chicken coop',\n",
       " 'corbie gable',\n",
       " 'mid-water',\n",
       " 'chicken yard',\n",
       " 'batwing',\n",
       " 'chamber',\n",
       " 'tower',\n",
       " 'butcher shop',\n",
       " 'menhir',\n",
       " 'boarding',\n",
       " 'waterway',\n",
       " 'hayloft',\n",
       " 'deck-house',\n",
       " 'kitchenette',\n",
       " 'dacha',\n",
       " 'mantelet',\n",
       " 'planetarium',\n",
       " 'brattice',\n",
       " 'obelisk',\n",
       " 'bank',\n",
       " 'tooth',\n",
       " 'pump well',\n",
       " 'barricade',\n",
       " 'steakhouse',\n",
       " 'turret',\n",
       " 'rest house',\n",
       " 'module',\n",
       " 'temple',\n",
       " 'mere',\n",
       " 'barbican',\n",
       " 'cooler',\n",
       " 'junk shop',\n",
       " 'dude ranch',\n",
       " 'automobile factory',\n",
       " 'lake',\n",
       " 'retrenchment',\n",
       " 'tidal river',\n",
       " 'steel mill',\n",
       " 'skeen arch',\n",
       " 'coast',\n",
       " 'narrow',\n",
       " 'oxeye',\n",
       " 'shoebox',\n",
       " 'lake bed',\n",
       " 'lanai',\n",
       " 'seamount',\n",
       " 'arm',\n",
       " 'head',\n",
       " 'condominium',\n",
       " 'plaza',\n",
       " 'polynya',\n",
       " 'billiard room',\n",
       " 'landfill',\n",
       " 'lookout',\n",
       " 'lobe',\n",
       " 'sty',\n",
       " 'state prison',\n",
       " 'circus',\n",
       " 'feedlot',\n",
       " 'lough',\n",
       " 'tailgate',\n",
       " 'smelter',\n",
       " 'supply closet',\n",
       " 'ranch house',\n",
       " 'alcove',\n",
       " 'handbarrow',\n",
       " 'muffle',\n",
       " 'detox',\n",
       " 'trimmer arch',\n",
       " 'massage parlor',\n",
       " 'skylight',\n",
       " 'wormhole',\n",
       " 'tupik',\n",
       " 'key',\n",
       " 'abutment arch',\n",
       " 'lancet arch',\n",
       " 'recycling plant',\n",
       " 'textile mill',\n",
       " 'jail',\n",
       " \"men's room\",\n",
       " 'mull',\n",
       " 'broken arch',\n",
       " 'picket fence',\n",
       " 'dripstone',\n",
       " 'monastery',\n",
       " 'water table',\n",
       " 'airframe',\n",
       " 'sod house',\n",
       " 'quad',\n",
       " \"farmer's market\",\n",
       " 'conservatory',\n",
       " 'library',\n",
       " 'back room',\n",
       " 'delta',\n",
       " 'ways',\n",
       " 'airlock',\n",
       " 'double door',\n",
       " 'strand',\n",
       " 'caisson',\n",
       " 'column',\n",
       " 'open-hearth furnace',\n",
       " 'side chapel',\n",
       " 'joint',\n",
       " 'casing',\n",
       " 'vestry',\n",
       " 'grove',\n",
       " 'mountain tent',\n",
       " 'cliff dwelling',\n",
       " 'maternity ward',\n",
       " 'saltworks',\n",
       " 'study hall',\n",
       " 'mukataa',\n",
       " 'caravansary',\n",
       " 'derrick',\n",
       " 'prison',\n",
       " 'underbrush',\n",
       " 'merlon',\n",
       " 'fallout shelter',\n",
       " 'walk-up apartment',\n",
       " 'glove compartment',\n",
       " 'jungle gym',\n",
       " 'shrine',\n",
       " 'jet bridge',\n",
       " 'boudoir',\n",
       " 'clean room',\n",
       " 'drinking fountain',\n",
       " 'continental shelf',\n",
       " 'bill',\n",
       " 'defilade',\n",
       " 'cellblock',\n",
       " 'storm window',\n",
       " 'customhouse',\n",
       " 'obstacle',\n",
       " 'grape arbor',\n",
       " 'party wall',\n",
       " 'socle',\n",
       " 'storeroom',\n",
       " 'city university',\n",
       " 'mihrab',\n",
       " 'gantry',\n",
       " 'terrace',\n",
       " 'dome',\n",
       " 'clerestory',\n",
       " 'icehouse',\n",
       " 'control tower',\n",
       " 'lychgate',\n",
       " 'browse',\n",
       " 'clinic',\n",
       " 'ocean floor',\n",
       " 'estaminet',\n",
       " 'pagoda',\n",
       " 'umbrella tent',\n",
       " 'detached house',\n",
       " 'shore',\n",
       " \"child's room\",\n",
       " 'stretcher',\n",
       " 'old growth',\n",
       " 'home theater',\n",
       " 'observatory',\n",
       " 'corner',\n",
       " 'barroom',\n",
       " 'alehouse',\n",
       " 'opera',\n",
       " 'apartment building',\n",
       " 'bicycle rack',\n",
       " 'stained-glass window',\n",
       " 'volcano',\n",
       " 'wash',\n",
       " 'lunette',\n",
       " 'coral reef',\n",
       " 'stockyard',\n",
       " 'diapir',\n",
       " 'levee',\n",
       " 'folium',\n",
       " 'coaming',\n",
       " 'coffee stall',\n",
       " 'bonanza',\n",
       " 'knoll',\n",
       " 'front porch',\n",
       " 'agora',\n",
       " 'firewall',\n",
       " 'ice field',\n",
       " 'grill',\n",
       " 'drawer',\n",
       " 'stable',\n",
       " 'oeil de boeuf',\n",
       " 'hole',\n",
       " 'safe house',\n",
       " 'motel',\n",
       " 'airing cupboard',\n",
       " 'hospital room',\n",
       " 'wadi',\n",
       " 'juke',\n",
       " 'divan',\n",
       " 'fieldwork',\n",
       " 'hindrance',\n",
       " 'stand',\n",
       " 'flophouse',\n",
       " 'boards',\n",
       " 'bell foundry',\n",
       " 'grocery store',\n",
       " 'morgue',\n",
       " 'tail gate',\n",
       " 'ogee arch',\n",
       " 'bight',\n",
       " 'lug',\n",
       " 'bioscope',\n",
       " 'press gallery',\n",
       " 'mount',\n",
       " 'chokey',\n",
       " 'college',\n",
       " 'anthill',\n",
       " 'high altar',\n",
       " 'hillside',\n",
       " 'chateau',\n",
       " 'public toilet',\n",
       " 'screen',\n",
       " 'core',\n",
       " 'pilothouse',\n",
       " 'pin',\n",
       " 'well',\n",
       " 'guesthouse',\n",
       " 'trumpet arch',\n",
       " 'orchestra pit',\n",
       " 'bell tower',\n",
       " 'church',\n",
       " 'trap door',\n",
       " 'mezzanine',\n",
       " 'stoop',\n",
       " 'squad room',\n",
       " 'tithe barn',\n",
       " 'cargo door',\n",
       " 'chapterhouse',\n",
       " 'french window',\n",
       " 'fjord',\n",
       " 'livery stable',\n",
       " 'locker room',\n",
       " 'cork',\n",
       " 'concert hall',\n",
       " 'warren',\n",
       " \"batter's box\",\n",
       " 'mounting',\n",
       " 'knob',\n",
       " 'tract housing',\n",
       " 'hot spot',\n",
       " 'clothing store',\n",
       " 'office',\n",
       " 'lean-to tent',\n",
       " 'dormer',\n",
       " 'specialty store',\n",
       " 'false bottom',\n",
       " 'frame',\n",
       " 'rotunda',\n",
       " 'pup tent',\n",
       " 'bluff',\n",
       " 'art school',\n",
       " 'bathhouse',\n",
       " 'mudguard',\n",
       " 'summer house',\n",
       " 'chain store',\n",
       " 'cardroom',\n",
       " 'checkroom',\n",
       " 'trailer camp',\n",
       " 'signboard',\n",
       " 'guestroom',\n",
       " 'lock-gate',\n",
       " 'resort hotel',\n",
       " 'pull-off',\n",
       " 'blast furnace',\n",
       " 'slopshop',\n",
       " 'fountain of youth',\n",
       " 'pinnacle',\n",
       " 'bulwark',\n",
       " 'cabinet',\n",
       " 'vacuum chamber',\n",
       " 'kiln',\n",
       " 'formica',\n",
       " 'greenroom',\n",
       " 'tableland',\n",
       " 'camera obscura',\n",
       " 'cubby',\n",
       " 'barbecue pit',\n",
       " 'fixer-upper',\n",
       " 'pump room',\n",
       " 'dining-hall',\n",
       " 'town hall',\n",
       " 'cupboard',\n",
       " 'fortification',\n",
       " 'caff',\n",
       " 'turnaround',\n",
       " 'beach',\n",
       " 'body',\n",
       " 'brickkiln',\n",
       " 'auditorium',\n",
       " 'fireplace',\n",
       " 'post and lintel',\n",
       " 'smoking room',\n",
       " 'pontoon bridge',\n",
       " 'stuffing box',\n",
       " 'guildhall',\n",
       " 'hot spring',\n",
       " 'prop',\n",
       " 'offing',\n",
       " 'cab',\n",
       " 'oast house',\n",
       " 'shed',\n",
       " 'shooting gallery',\n",
       " 'penal institution',\n",
       " 'lancet window',\n",
       " 'stockroom',\n",
       " 'ritz',\n",
       " 'cellarage',\n",
       " 'overpass',\n",
       " 'second growth',\n",
       " 'hermitage',\n",
       " 'pizzeria',\n",
       " 'latrine',\n",
       " 'sea',\n",
       " 'telecom hotel',\n",
       " 'kirk',\n",
       " 'forecourt',\n",
       " 'stonework',\n",
       " 'parvis',\n",
       " 'picture frame',\n",
       " \"baby's tears\",\n",
       " 'courthouse',\n",
       " 'swale',\n",
       " 'bedlam',\n",
       " 'cloister',\n",
       " 'narthex',\n",
       " 'rose window',\n",
       " 'vapor lock',\n",
       " 'suntrap',\n",
       " 'medical building',\n",
       " 'sawhorse',\n",
       " 'stabling',\n",
       " 'turnpike',\n",
       " 'paper mill',\n",
       " 'calk',\n",
       " 'kremlin',\n",
       " 'seif dune',\n",
       " 'pool',\n",
       " 'funeral home',\n",
       " 'canvas tent',\n",
       " 'wall tent',\n",
       " 'labor camp',\n",
       " 'clubhouse',\n",
       " 'gun room',\n",
       " 'shouldered arch',\n",
       " 'surgery',\n",
       " 'master bedroom',\n",
       " 'mudhif',\n",
       " 'rapid',\n",
       " 'partition',\n",
       " 'oxbow',\n",
       " 'tobacco shop',\n",
       " 'display window',\n",
       " 'lowland',\n",
       " 'steep',\n",
       " 'lattice',\n",
       " 'cleat',\n",
       " 'transom',\n",
       " 'marketplace',\n",
       " 'clog',\n",
       " 'two-man tent',\n",
       " 'swing door',\n",
       " 'billet',\n",
       " 'louvered window',\n",
       " 'common room',\n",
       " 'steeple',\n",
       " 'military hospital',\n",
       " 'hutment',\n",
       " 'steel arch bridge',\n",
       " 'couchette',\n",
       " 'groundcover',\n",
       " 'head shop',\n",
       " 'rupturewort',\n",
       " 'pier arch',\n",
       " 'lazaretto',\n",
       " 'gallery',\n",
       " 'hall of fame',\n",
       " 'workroom',\n",
       " 'microbrewery',\n",
       " 'nacelle',\n",
       " 'luggage rack',\n",
       " 'shebeen',\n",
       " 'gut',\n",
       " 'messuage',\n",
       " 'snap brim',\n",
       " 'booth',\n",
       " 'tropical rain forest',\n",
       " 'opium den',\n",
       " 'prompt box',\n",
       " 'detention home',\n",
       " 'shrubbery',\n",
       " 'bi-fold door',\n",
       " 'stacks',\n",
       " 'rolling mill',\n",
       " 'sail',\n",
       " 'drawing room',\n",
       " 'death house',\n",
       " 'attic',\n",
       " 'rabbit burrow',\n",
       " 'dwelling',\n",
       " 'watchtower',\n",
       " 'school',\n",
       " 'office building',\n",
       " 'capitol',\n",
       " 'trestlework',\n",
       " 'safehold',\n",
       " 'oil refinery',\n",
       " ...]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"\\nTotal:\", len(places_vocab))\n",
    "places_vocab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('/Users/sunyambagga/Desktop/txtLAB-2/minimal-narrativity/data/places_lexicon.txt', 'w') as F:\n",
    "#     for p in places_vocab:\n",
    "#         F.write(p)\n",
    "#         F.write('\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fin."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
